********************************************************************
***** GENDER IDENTIFICATION:  GENDER AND COLLABORATION (LORENZO DUCTOR, SANJEEV GOYAL AND ANJA PRUMMER)
***** Start: 13.02.2020
***** Last change:  13.02.2020
***** CREATED BY: Lorenzo Ductor (lductor@ugr.es)   
***** OBJECTIVE: Code to estimate the gender using first name and country of origin
********************************************************************


import delimited "gender-api.csv", clear /*This database is private and can be acquired from the website: https://gender-api.com/en/; we are not allowed to publish it online*/

gen female=1 if ga_gender=="female"
replace female=0 if ga_gender=="male"
gen firstname=lower(ga_first_name)
gen male=1 if ga_gender=="male"
replace male=0 if ga_gender=="female"
bys firstname: gen n=_N
bys firstname: egen sfemale=sum(female)
bys firstname: egen smale=sum(male)
gen pfemale=sfemale/n
gen pmale=smale/n

drop if pfemale==0 & pmale==0
keep firstname pfemale pmale
duplicates drop
save gender_countryapi, replace


use "netwnames1970-2017.dta", clear. /*this file needs to be created from the subprocedure “ExportNetworkDataC” in module ‘export’ in EconLit_empty.mdb*/
	merge 1:1 authid using "netwnamesedited",update
	keep surname firstname suffix initials authid 
	merge m:1 firstname using "allname_stata13.dta"
	drop if _merge==2
	drop _merge
	rename gender genderUS
	rename authid auth
	merge 1:1 auth using "genderUS_7011.dta",nogen
    replace genderUS=gender if missing(genderUS)
	rename genderUS female
	drop gender
	merge m:1 firstname using "gender_countryapi"
	drop if _merge==2

	gen femaleapi=1 if pfemale>=0.95 & pfemale<.
	replace femaleapi=0 if pmale>=0.95 & pmale<.
	drop _merge
	drop if auth==.
	save gender7017countryapi, replace
	
/* Report: 348909 idenfified using US data;  381311 authors identified using all countries*/ 
	
	
